import json
import os
from collections import Counter
import csv
import time
import re
import numpy
from datetime import datetime
from datetime import timedelta
# ht=[ '#YearOnTikTok', '#lovestory','#artmas','#festivefashion','#happyhanukkah','#planttiktok','#productivity','#ImAGhost','#catchphrases','#ootd','#comfortfood','#holidaycrafts','#watchmegrow','#holidaydecor','#raisedby','#makeitvogue','#goodmorning','#watchingxfactor','#catchphrases','#rnbvibes','#gamenight','#HorrorToK', '#ProblemSolved', '#SkateLife', '#ThriftShop', '#SpookyTreats',  '#tiktokfood', '#interiordesign', '#sfxmakeup', '#ComingOfAge', '#falldiy', '#worldseries', '#gaminglife', '#meleaving', '#yellow', '#yougotthis','#mycostume','#mypfp','#welldone','#nativefamily','#happyhalloween','#homeoffice','#myrecommendation','#myhobby','#fanedit','#stemlife','#motivationmonday','#wip','#veteransday','#holidaycountdown','#WeekendVibes','#holidaysourway','#RoomTour','#bakingszn','#nbadraft',
#      '#nonuancenovember','#gamingsetup','#carsoftiktok','#needtoknow','#whenwewereyounger','#halloweenishere','#ourtype','#fallguysmoments','#artmas','#onlinedating','#familyrecipe','#givingthanks','#diceroll','#holidayvibes','#holidaytiktok','#recordsday','#inkdrawing','#happyholidays','#homecooked','#neonshadow',  '#youwantmore' ,'#familyimpression','#givingszn','#whereilive','foodtiktok','#holidaymusic','#howbizarre', '#personalfinance'
#      ,'#thinkingabout', '#housetour','#readysetshop','#easydiy','#coldweather','#wildanimals','#festivefashion','graphicdesign','smallbusiness','#halloweenlook','fallfashion','#artmas','#haventseen','#planttiktok','#productivity','#bekind','#onhold','#growupwithme','#tabletop','#theatrekids','#wishlist','#haventseen','#selfimprovement', '#OhNo','#animation']
ht=['ComingOfAge', 'falldiy', 'gaminglife', 'interiordesign', 'oneyourthere', 'sfxmakeup', 'tiktokfood', 'worldseries', 'halloweenishere', 'meleaving', 'yellow', 'yougotthis', 'mycostume', 'animation', 'youwantmore', 'halloweenlook', 'mypfp', 'nativefamily', 'welldone', 'happyhalloween', 'OhNo', 'holidaytiktok', 'rnbvibes', 'carsoftiktok', 'homeoffice', 'myrecommendation', 'fallfashion', 'familyimpression', 'myhobby', 'neonshadow', 'fanedit', 'stemlife', 'motivationmonday', 'wip', 'veteransday', 'holidaycountdown', 'whereilive', 'bekind', 'onhold', 'diwali', 'WeekendVibes', 'bakingszn', 'RoomTour', 'foodtiktok', 'growupwithme', 'holidaysourway', 'tabletop', 'nbadraft', 'recordsday', 'ourtype', 'nonuancenovember', 'artmas', 'gamingsetup', 'theatrekids', 'needtoknow', 'whenwewereyounger', 'graphicdesign', 'inkdrawing', 'fallguysmoments', 'familyrecipe', 'onlinedating', 'watchmegrow', 'givingthanks', 'readysetshop', 'diceroll', 'smallbusiness', 'holidayvibes', 'coldweather', 'givingszn', 'happyholidays', 'homecooked', 'easydiy', 'wildanimals', 'holidaymusic', 'howbizarre', 'selfimprovement', 'ImAGhost', 'personalfinance', 'housetour', 'thinkingabout', 'holidaycrafts', 'happyhanukkah', 'RatatouilleMusical', 'catchphrases', 'ootd', 'comfortfood', 'holidaydecor', 'raisedby', 'goodmorning', 'makeitvogue', 'festivefashion', 'gamenight', 'artmas', 'haventseen', 'planttiktok', 'productivity', 'lovestory', 'wishlist', 'hellowinter', 'YearOnTikTok', 'nbaisback', 'perfectgifts', 'holidaytreats', 'wrappinggifts', 'feliznavidad', 'timewarpjump', 'christmas2020', 'MyHaul', 'cozyathome', 'myhaul', 'winterfit', 'withouttellingme', 'hyperfixated', 'whatilearned',  'WordsOfWisdom',  'rareaesthetic', 'welcome2021', 'dailyvlog', 'easyrecipe', 'mystyle', 'Bye2020', 'myroutine', 'gamergoals', 'projectcar', 'homemade', 'inlove', 'GreenScreenScan', 'tortillatrend', 'NFLplayoffs', 'FitnessRoutine', 'IsThisAvailable', 'wee', 'weirdpets', 'fitnessroutine',  'moneytok', 'nflplayoffs', 'healthycooking', 'nhlfaceoff', 'groupchat', 'NHLFaceOff', 'winterfashion', 'skincare101', 'zodiacsign', 'homeimprovement', 'seashanty', 'cleantok', 'visionboard', '2021affirmations', 'mlkday', 'joblife', 'foodie', 'timewarpwaterfall', 'plantparent', 'WinterMagic', 'notaperfectperson', 'tiktoktutorial', 'cocinando', 'OlympicsCountdown', 'couplethings', 'clingypet', 'meditation101', 'winterbeautytips', 'tiktokdiy', 'typing', 'roundofapplause', 'xgamesmode', 'feelinggood', 'RoyalRumble', 'emophase', 'wintersports', 'makeblackhistory', 'relationshipstorytime', 'albumcover', 'stepbystep', 'fetapasta', 'womeninsports', 'healthyheart', 'imbusyrightnow', 'beautyhacks', 'tiktoktailgate', 'puppybowl', 'superbowllv', 'valentinesdiy', 'melaninmagic', 'coversforlovers', 'kissyourpet', 'womeninstem', 'lunarnewyear', 'perfectmatch', 'galentinesday', 'valentinesday', 'loveyourinsecurities', 'tiktokfashionmonth', 'blackcreatives', 'colddays', 'mifamilia', 'careeradvice', 'stopasianhate', 'perfectdrink', 'snowstorm', 'carhacks', 'homeproject', 'blackandproud', 'dramaticmoments', 'bakedoats', 'homecook', 'laughingduet', 'yoga101', 'somethingyoulearned', 'upcycling', 'fantheory', 'tiktokfitness', 'gamingtiktok', 'seitan', 'glasspainting', 'whenwomenwin', 'science101', 'thriftflip', 'Lifestyle', 'tiktokwildlifeday', 'dayandnight']
#ht=['ComingOfAge', 'falldiy', 'gaminglife', 'interiordesign', 'oneyourthere', 'sfxmakeup', 'tiktokfood', 'worldseries', 'halloweenishere', 'meleaving']
#ht=['makeitvogue','LGBTQHistory','wip','haventseen','holidayvibes','SkateLife','ootd','bekind','personalfinance','cozyathome','gamenight','happyholidays','ProblemSolved','veteransday','RoomTour','meleaving','yellow']
# for name in os.listdir('D:\\Work\\Tool\\tiktok\\TikToks\\'):
#     if not(name.endswith('.json')):
#         continue
#     if not(name.lower().startswith('hashtag')):
#         continue
#     fname=name.replace('.json','').split('_')
#     if fname[1].lower() not in ht:
#         ht.append(fname[1].lower())
hts=[h.lower().replace('#','') for h in ht]
ht=[]
for item in hts:
    if item not in ht:
        ht.append(item)


trendinglist={}
for name in os.listdir('D:\\Work\\Tool\\tiktok\\TikToks\\'):
    fname=name.replace('.json','').split('_')
    if len(fname) > 3:
        if fname[3] == 't':
            if fname[2] in trendinglist.keys():
                trendinglist[fname[2]].append(fname[1].lower())
            else:
                trendinglist[fname[2]]=[]
                trendinglist[fname[2]].append(fname[1].lower())
        elif int(fname[3]) > 0:
            if fname[2] in trendinglist.keys():
                trendinglist[fname[2]].append(fname[1].lower())
            else:
                trendinglist[fname[2]] = []
                trendinglist[fname[2]].append(fname[1].lower())

htranking=0
idlist={}
ddt=datetime(2021,4,24)
with open('D:\\Work\\Tool\\tiktok\\all\\infos_d7.tsv', 'w', encoding='utf-8',
          newline='\n') as filename_output:
    writer = csv.writer(filename_output, delimiter='\t')
    row = ['hashtag id', 'id', 'INIT_IMPRESSION', 'DAY', 'IF_TRENDING', 'RANKING', 'IF_TRENDING*RANKING', 'AGE',
           'IF_TRENDING*AGE', 'NUM_HASHTAG', 'NUM_TRENDING', 'IF_FYP', 'FOLLOWING', 'FOLLOWER', 'AVG_HEART', 'VIDEO',
           'DIGG', 'video length']
    writer.writerow(row)
    print(row)
    with open('D:\\Work\\Tool\\tiktok\\all\\ab_d7.tsv', 'r', encoding='utf-8', newline='\n') as filename_input:
        reader = csv.reader(filename_input, delimiter='\t')
        next(reader)
        for line in reader:
            if line[0] not in idlist.keys():
                idlist[line[0]]=[]
            if line[1] not in idlist[line[0]] and (float(line[2])>0 and float(line[3])>0):
                idlist[line[0]].append(line[1])
        for hashtag in ht:
            print(hashtag)
            htranking += 1
            datalist=[]
            dict={}
            for name in os.listdir('D:\\Work\\Tool\\tiktok\\TikToks\\'):
                htname=name.split('_')[1]
                if hashtag in htname.lower() :
                    dt=name.split('_')[2].replace('.json','')
                    if ddt < datetime.strptime(dt, '%Y-%m-%d') :
                        continue
                    datalist.append(name)

            for i in range(len(datalist)):
                ranking=0
                with open('D:\\Work\\Tool\\tiktok\\TikToks\\' + datalist[i], 'r', encoding='utf-8', newline='\n') as filename_input:
                    f=[]
                    for line in filename_input:
                        z = json.loads(line)
                        if 'id' in z.keys():
                            id=z['id']
                            ct=time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(z['createTime'])))
                            vl=z['video']['duration']
                            pc=int(z['stats']['playCount'])
                            numofht=z['desc'].count('#')
                            if '#fyp' in z['desc'].lower() or '#foryourpage'in z['desc'].lower() or '#foryoupage' in z['desc'].lower() or '#foryou' in z['desc'].lower() or '#fy' in z['desc'].lower():
                                fyp=1
                            else:
                                fyp=0
                            htintext=re.findall('#\w+\s*',z['desc'].lower())
                        elif 'itemInfos' in z.keys():
                            id=z['itemInfos']['id']
                            ct=time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(z['itemInfos']['createTime'])))
                            vl=z['itemInfos']['video']['videoMeta']['duration']
                            pc = int(z['itemInfos']['playCount'])
                            numofht = z['itemInfos']['text'].count('#')
                            if 'fyp' in z['itemInfos']['text'].lower() or 'foryourpage' in z['itemInfos']['text'].lower()  or 'foryoupage' in z['itemInfos']['text'].lower():
                                fyp=1
                            else:
                                fyp=0
                            htintext=re.findall('#\w+\s*', z['itemInfos']['text'].lower())
                        ranking += 1
                        if ranking > 2000:
                            break

                        if id in f:
                            continue
                        else:
                            f.append(id)

                        place=i+1
                        fname=datalist[i].replace('.json','')
                        if len(fname.split('_'))>3:
                            if fname.split('_')[3]=='t':
                                is_trending=1
                            elif int(fname.split('_')[3])>0 and int(fname.split('_')[3])<=10:
                                is_trending = 1
                            else:
                                is_trending = 0
                        else:
                            is_trending=0
                        tc=datalist[i].replace('.json','').split('_')[2]+' 23:59:59'
                        trendinght=0
                        for tht in htintext:
                            if tht.replace('#','').replace(' ','').lower() in trendinglist[datalist[i].replace('.json','').split('_')[2]]:
                                trendinght+=1
                        diff=datetime.strptime(tc,'%Y-%m-%d %H:%M:%S')-datetime.strptime(ct,'%Y-%m-%d %H:%M:%S')
                        diff=round(diff.days+diff.seconds/3600/24,4)
                        if diff<0:
                            diff=0
                        following = int(z['authorStats']['followingCount'])
                        follower = int(z['authorStats']['followerCount'])
                        heart = int(z['authorStats']['heartCount'])
                        video = int(z['authorStats']['videoCount'])
                        digg =int( z['authorStats']['diggCount'])
                        if following<=0 or follower<=0 or pc<=0 or video<=0 or digg<=0:
                            continue

                        if id in dict.keys():
                            continue
                        else:
                            if video == 0 :
                                dict[id] = [numpy.log(pc), place, is_trending, ranking, is_trending * ranking, diff,
                                            diff * is_trending, numofht, trendinght, fyp, following, numpy.log(follower),
                                            'NA', video, digg, vl]
                            else:
                                dict[id]=[numpy.log(pc),place,is_trending,ranking,is_trending*ranking,diff,diff*is_trending,numofht,trendinght,fyp,following,numpy.log(follower),numpy.log(heart/video),video,digg,vl]
            j=0
            for key in dict.keys():
                rows=[htranking,key]
                rows.extend(dict[key])
                # if j<100:
                #     print(rows)
                #     j+=1
                if key not in idlist[str(htranking)]:
                    continue
                writer.writerow(rows)

